
#################################################################################################

#  Description: Generating measures of procedural change presented in Fleming, Thomas G. 
#               (Forthcoming) 'Parliamentary Procedure under Theresa May: Nothing Has Changed?', 
#               Parliamentary Affairs.
#  Author:      Thomas G. Fleming
#  Date:        30/05/2020
#  Note:        "ukhcso_parlrules.csv" is the ParlRulesData Project (2020) 'UK House of Commons
#               Standing Orders Dataset', Version 1.2.0. Downloaded from www.parlrulesdata.org.
#               The code requires this to be downloaded and saved in the same folder.

#################################################################################################

rm(list=ls())

library(tidyr)                                    # for spread()

library(rstudioapi)                               # sets wd as current folder
current_path <- getActiveDocumentContext()$path
setwd(dirname(current_path))

##################################
# Loading and formatting SO data #
##################################

# Loads the SO data and converts it to wide format

  so.data <- read.csv("./ukhcso_parlrules.csv")
  so.data <- so.data[c("root_number", "adoption_date", "SO.text")]
  so.data <- spread(so.data, adoption_date, SO.text)

# Reduces data to the  SOs as they stood in last version of each premiership (and the first of the next)
  
  so.data <- so.data[c("1945-03-09",    # SOs at start of Attlee, 
                       "1950-11-20",    # SOs at end of Attlee / start of Churchill
                       "1954-11-03",    # SOs at end of Churchill / start of Eden
                       "1954-11-03",    # SOs at end of Eden / start of Macmillan
                       "1963-08-01",    # SOs at end of Macmillan / start of Douglas-Home
                       "1963-08-01",    # SOs at end of Douglas-Home / start of Wilson
                       "1969-10-21",    # SOs at end of Wilson / start of Heath
                       "1973-11-20",    # SOs at end of Heath / start of Wilson
                       "1976-02-24",    # SOs at end of Wilson / start of Callaghan
                       "1977-02-02",    # SOs at end of Callaghan / start of Thatcher
                       "1990-10-25",    # SOs at end of Thatcher / start of Major
                       "1997-03-20",    # SOs at end of Major / start of Blair
                       "2007-03-28",    # SOs at end of Blair / start of Brown
                       "2010-03-04",    # SOs at end of Brown / start of Cameron (Coalition)
                       "2015-03-17",    # SOs at end of Cameron (Coalition) / start of Cameron (Con)
                       "2015-10-28",    # SOs at end of Cameron (Con) / start of May
                       "2019-01-07")]   # SOs at end of May / start of Johnson

#######################################################
# Pre-processing text to remove certain types of text #
#######################################################
  
# converts to lower case
  
  for (i in 1:(ncol(so.data))) { 
    so.data[,i] <- tolower(as.character(so.data[,i])) 
    }

# removes punctuation
     
  for (i in 1:(ncol(so.data))) { 
    so.data[,i] <- gsub("[[:punct:]]", "", as.character(so.data[,i])) 
  }
  
# removes numbers
  
  for (i in 1:(ncol(so.data))) { 
    so.data[,i] <- gsub("[[:digit:]]", "", as.character(so.data[,i])) 
  }
  
#########################################################################
# Calculating the change between the SOs at the end of each PM's tenure #
#########################################################################

# Creates empty dataframe with column for each PM and row for each root number
  
  change <- data.frame(matrix(ncol = ncol(so.data) - 1, nrow = nrow(so.data)))
  PMs.list <- c("Attlee", "Churchill", "Eden", "Macmillan", "Douglas-Home", "Wilson I", "Heath", "Wilson II",
                     "Callaghan", "Thatcher", "Major", "Blair", "Brown", "Cameron (Coal)", "Cameron (Con)", "May")
  names(change) <- PMs.list
  
# For each PM-root number pair, identifies what happened to it between first and last day of premiership
  
  for (i in 1:(ncol(change))) {
    change[i] <- ifelse(is.na(so.data[,i]) == TRUE & is.na(so.data[,i+1]) == TRUE, "",
                 ifelse(is.na(so.data[,i]) == TRUE & is.na(so.data[,i+1]) == FALSE, "Added",
                 ifelse(is.na(so.data[,i]) == FALSE & is.na(so.data[,i+1]) == TRUE, "Deleted", 
                 ifelse(as.character(so.data[,i]) != as.character(so.data[,i+1]), "Changed",
                 ifelse(as.character(so.data[,i]) == as.character(so.data[,i+1]), "Unchanged", NA)))))
  }
  
# Calculates number of sub-articles changed, added, deleted, or unchanged during each PM's tenure
  
  pm.data <- as.data.frame(PMs.list)
  
  for (i in 1:nrow(pm.data)) {
     pm.data$added[i] <- sum(change[,i] == "Added")
     pm.data$deleted[i] <- sum(change[,i] == "Deleted")
     pm.data$changed[i] <- sum(change[,i] == "Changed")
     pm.data$unchanged[i] <- sum(change[,i] == "Unchanged")
     pm.data$unused[i] <- sum(change[,i] == "")
  }
  
# Calculates starting number of sub-articles of each PM's tenure
  
  for (i in 1:nrow(pm.data)) {
    pm.data$starting.total[i] <- sum(is.na(so.data[,i]) != TRUE)
  }

###############################
# Exporting data as .csv file #
###############################
  
  write.csv(pm.data, "./so_change_data.csv", row.names = FALSE)
  
##########################################################################################
  
rm(list = ls())
